board_games <- readr::read_csv("/cloud/project/data/board_games.csv")
## Rows: 10532 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): description, image, name, thumbnail, artist, category, compilation...
## dbl (10): game_id, max_players, max_playtime, min_age, min_players, min_play...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
board_games%>%
arrange(desc(max_playtime))
## # A tibble: 10,532 × 22
## game_id description image max_players max_playtime min_age min_players
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 4815 This is a war g… //cf.g… 10 60000 14 8
## 2 46669 (from GMT websi… //cf.g… 4 17280 0 2
## 3 254 (from the back … //cf.g… 7 12000 14 2
## 4 6942 Introduction:&#… //cf.g… 4 12000 12 2
## 5 1499 (from ADG websi… //cf.g… 7 6000 12 2
## 6 5622 Pacific War is … //cf.g… 2 6000 16 2
## 7 38578 Age of Muskets … //cf.g… 6 6000 0 2
## 8 173504 The Greatest Da… //cf.g… 8 6000 12 2
## 9 5651 The Longest Day… //cf.g… 8 5400 12 2
## 10 13532 (from the box:)… //cf.g… 2 4500 12 2
## # … with 10,522 more rows, and 15 more variables: min_playtime <dbl>,
## # name <chr>, playing_time <dbl>, thumbnail <chr>, year_published <dbl>,
## # artist <chr>, category <chr>, compilation <chr>, designer <chr>,
## # expansion <chr>, family <chr>, mechanic <chr>, publisher <chr>,
## # average_rating <dbl>, users_rated <dbl>
board_games <- board_games %>%
mutate(
playtime_group = case_when(
playing_time < 1 ~ "N/A",
playing_time >= 1 & playing_time <= 20 ~ "Under 20 minutes",
playing_time >= 21 & playing_time <= 40 ~ "20-40 minutes",
playing_time >= 41 & playing_time <= 60 ~ "40-60 minutes",
playing_time >= 61 & playing_time <= 120 ~ "1-2 hours",
playing_time >= 121 & playing_time <= 180 ~ "2-3 hours",
playing_time >= 181 & playing_time <= 240 ~ "3-4 hours",
playing_time >= 241 & playing_time <= 360 ~ "4-6 hours",
playing_time >= 361 ~ "Over 6 hours"
))
board_games%>%
count(playtime_group)%>%
arrange(desc(n))
## # A tibble: 9 × 2
## playtime_group n
## <chr> <int>
## 1 40-60 minutes 2819
## 2 20-40 minutes 2210
## 3 Under 20 minutes 1972
## 4 1-2 hours 1896
## 5 2-3 hours 528
## 6 N/A 350
## 7 3-4 hours 344
## 8 4-6 hours 312
## 9 Over 6 hours 101
stacked barplot
Research questions: Does this change with recommended ages? Does this change with different game categories?